In [6]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
In [7]:
import os
os.getcwd()
Out[7]:
'C:\\Users\\Dominic\\OneDrive\\DataMining\\Final Project'
In [8]:
crashdata = pd.read_csv("Crashes.csv")
crashdata.describe(include="all")
Out[8]:
Case Number County Crash Date Crash Day Of Week Crash Month Crash Year Crash Hour Crash Time Crash Location Cross Street Name ... Head On Collision Involved Run Off Road Involved Total Bicyclists Killed Pedestrian Or Bicyclist Involved Total Drivers Involved Total Occupants Involved Curve Related Total Bicyclists Involved Rural Or Urban Hit and Run Involved
count 3978596 3978597 3978597 3978597 3978597 3.978597e+06 3.957072e+06 3957072 3978378 2898060 ... 3978597 3978597 3.978597e+06 3978597 3.978597e+06 3.978597e+06 3978597 3.978597e+06 3978597 3305639
unique 3261992 21 5094 7 12 NaN NaN 1440 291328 234594 ... 2 2 NaN 2 NaN NaN 2 NaN 3 127
top S Bergen 02/12/2008 Friday October NaN NaN 16:00:00 GARDEN STATE PARKWAY US 1 ... No No NaN False NaN NaN No NaN Urban ["No","No"]
freq 207 430032 3050 663546 355711 NaN NaN 8899 116196 6584 ... 3881014 3646159 NaN 3875805 NaN NaN 3590030 NaN 2573010 2059799
mean NaN NaN NaN NaN NaN 2.012243e+03 1.331801e+01 NaN NaN NaN ... NaN NaN 4.398535e-05 NaN 1.750912e+00 2.117345e+00 NaN 7.875641e-03 NaN NaN
std NaN NaN NaN NaN NaN 3.961624e+00 5.141029e+00 NaN NaN NaN ... NaN NaN 6.669792e-03 NaN 7.100341e-01 1.444981e+00 NaN 9.374611e-02 NaN NaN
min NaN NaN NaN NaN NaN 2.006000e+03 0.000000e+00 NaN NaN NaN ... NaN NaN 0.000000e+00 NaN 0.000000e+00 0.000000e+00 NaN 0.000000e+00 NaN NaN
25% NaN NaN NaN NaN NaN 2.009000e+03 1.000000e+01 NaN NaN NaN ... NaN NaN 0.000000e+00 NaN 2.000000e+00 1.000000e+00 NaN 0.000000e+00 NaN NaN
50% NaN NaN NaN NaN NaN 2.012000e+03 1.400000e+01 NaN NaN NaN ... NaN NaN 0.000000e+00 NaN 2.000000e+00 2.000000e+00 NaN 0.000000e+00 NaN NaN
75% NaN NaN NaN NaN NaN 2.016000e+03 1.700000e+01 NaN NaN NaN ... NaN NaN 0.000000e+00 NaN 2.000000e+00 3.000000e+00 NaN 0.000000e+00 NaN NaN
max NaN NaN NaN NaN NaN 2.019000e+03 2.300000e+01 NaN NaN NaN ... NaN NaN 2.000000e+00 NaN 3.700000e+01 7.000000e+01 NaN 5.000000e+00 NaN NaN

11 rows × 61 columns

crashdata.head()
In [69]:
pd.set_option('display.max_columns', 500)
In [70]:
crashdata.head()
Out[70]:
Case Number County Crash Date Crash Day Of Week Crash Month Crash Year Crash Hour Crash Time Crash Location Cross Street Name Distance To Cross Street DHTS Region_ Crash Type Dot Web Id Id Functional Class Intersection Latitude MPO Jurisdiction Municipality Police Department Pedestrians Killed Longitude Alcohol Involved Highway Type Environmental Condition Severity Hazmat Involved Light Condition Road Surface Type Road System Route SRI Surface Condition Total Suspected Serious Injuries Total Injured Total Killed Total Pedestrians Involved Total Vehicles Involved Distracted Driving Involved Unsafe Speed Involved Bicyclist Involved Young Driver (16-20) Involved Older Driver (65+) Involved Motorcycle Involved Unrestrained Occupant Involved Live Animal Involved Drugged Driver Involved Single Vehicle Crash Drowsy Driver Involved Head On Collision Involved Run Off Road Involved Total Bicyclists Killed Pedestrian Or Bicyclist Involved Total Drivers Involved Total Occupants Involved Curve Related Total Bicyclists Involved Rural Or Urban Hit and Run Involved
0 B130-2019-04124A Essex 12/12/2019 Thursday December 2019 2.0 02:57:00 I-78 NaN NaN North Fixed Object 20190714B130-2019-04124A 10112726 Urban Interstate Not Within Intersection Boundaries 40.710574 North Jersey Transportation Planning Authority N.J.D.O.T. Newark City NEW JERSEY STATE POLICE 0 -74.174822 Yes Dual/Dual Clear Suspected Minor Injury No Dark - Street Lights On (Spot) Blacktop Interstate 78.0 00000078 Dry 0.0 1 0 0.0 1 Yes No No No No No No No No Yes No No Yes 0 False 1 1 Yes 0 Urban NaN
1 B060-2019-03839A Bergen 12/12/2019 Thursday December 2019 NaN NaN I-287 NaN NaN North Same Direction - Rear End 20190242B060-2019-03839A 10112402 Urban Interstate Not Within Intersection Boundaries 41.023651 North Jersey Transportation Planning Authority N.J.D.O.T. Oakland Borough NEW JERSEY STATE POLICE 0 -74.268504 No Divided Clear Possible Injury No Dark - Street Lights Not Present Blacktop Interstate 287.0 00000287 Dry 0.0 2 0 0.0 2 Yes No No Yes No No No No No No Yes No No 0 False 2 2 No 0 Urban NaN
2 B060-2019-03838A Essex 12/11/2019 Wednesday December 2019 19.0 19:39:00 I-80 NaN NaN North Same Direction - Side Swipe 20190707B060-2019-03838A 10112656 Urban Interstate Not Within Intersection Boundaries 40.893689 North Jersey Transportation Planning Authority N.J.D.O.T. Fairfield Township NEW JERSEY STATE POLICE 0 -74.279018 No Divided Clear No Apparent Injury No Dark - Street Lights Not Present Blacktop Interstate 80.0 00000080 Dry 0.0 0 0 0.0 2 No No No No No No No No No No No No Yes 0 False 2 2 No 0 Urban Yes
3 B130-2019-04122A Somerset 12/11/2019 Wednesday December 2019 18.0 18:44:00 ROUTE 533 NaN NaN Central Fixed Object 20191812B130-2019-04122A 10113967 Urban Collector Not Within Intersection Boundaries 40.498190 North Jersey Transportation Planning Authority County Millstone Borough NEW JERSEY STATE POLICE 0 -74.588213 No Undivided Clear No Apparent Injury No Dark - Street Lights On (Continuous) Blacktop County 533.0 00000533 Dry 0.0 0 0 0.0 1 Yes No No No No No No No No Yes No No Yes 0 False 1 1 Yes 0 Urban NaN
4 A040-2019-00866A Cumberland 12/11/2019 Wednesday December 2019 18.0 18:17:00 LOVE LN ** NJ 77 25.0 South Fixed Object 20190613A040-2019-00866A 10113405 Urban Local Not Within Intersection Boundaries 39.471608 South Jersey Transportation Planning Organization Municipal Upper Deerfield Township NEW JERSEY STATE POLICE 0 -75.206745 No Undivided Clear No Apparent Injury No Dark - Street Lights Not Present Blacktop Municipal NaN 06131002 Dry 0.0 0 0 0.0 1 No No No No Yes No No No No Yes No No Yes 0 False 1 1 No 0 Urban NaN
In [71]:
demo=crashdata[["Case Number", "County"]]
demo.head(6)
Out[71]:
Case Number County
0 B130-2019-04124A Essex
1 B060-2019-03839A Bergen
2 B060-2019-03838A Essex
3 B130-2019-04122A Somerset
4 A040-2019-00866A Cumberland
5 B150-2019-00439A Warren
In [72]:
crashdata2 = crashdata[["Case Number", "County", "Crash Date", "Crash Day Of Week", "Crash Month", "Crash Year", "Crash Hour", "Crash Type", "Intersection", "Latitude", "Longitude","Pedestrians Killed", "Environmental Condition", "Severity", "Hazmat Involved", "Light Condition", "Surface Condition", "Distracted Driving Involved", "Unsafe Speed Involved", "Bicyclist Involved", "Rural Or Urban"]]
crashdata2.head()
Out[72]:
Case Number County Crash Date Crash Day Of Week Crash Month Crash Year Crash Hour Crash Type Intersection Latitude Longitude Pedestrians Killed Environmental Condition Severity Hazmat Involved Light Condition Surface Condition Distracted Driving Involved Unsafe Speed Involved Bicyclist Involved Rural Or Urban
0 B130-2019-04124A Essex 12/12/2019 Thursday December 2019 2.0 Fixed Object Not Within Intersection Boundaries 40.710574 -74.174822 0 Clear Suspected Minor Injury No Dark - Street Lights On (Spot) Dry Yes No No Urban
1 B060-2019-03839A Bergen 12/12/2019 Thursday December 2019 NaN Same Direction - Rear End Not Within Intersection Boundaries 41.023651 -74.268504 0 Clear Possible Injury No Dark - Street Lights Not Present Dry Yes No No Urban
2 B060-2019-03838A Essex 12/11/2019 Wednesday December 2019 19.0 Same Direction - Side Swipe Not Within Intersection Boundaries 40.893689 -74.279018 0 Clear No Apparent Injury No Dark - Street Lights Not Present Dry No No No Urban
3 B130-2019-04122A Somerset 12/11/2019 Wednesday December 2019 18.0 Fixed Object Not Within Intersection Boundaries 40.498190 -74.588213 0 Clear No Apparent Injury No Dark - Street Lights On (Continuous) Dry Yes No No Urban
4 A040-2019-00866A Cumberland 12/11/2019 Wednesday December 2019 18.0 Fixed Object Not Within Intersection Boundaries 39.471608 -75.206745 0 Clear No Apparent Injury No Dark - Street Lights Not Present Dry No No No Urban
In [73]:
print(pd.isnull(crashdata2).sum())
Case Number                          1
County                               0
Crash Date                           0
Crash Day Of Week                    0
Crash Month                          0
Crash Year                           0
Crash Hour                       21525
Crash Type                        3361
Intersection                         0
Latitude                       1273789
Longitude                      1273789
Pedestrians Killed                   0
Environmental Condition          13149
Severity                             0
Hazmat Involved                      0
Light Condition                  14846
Surface Condition                11638
Distracted Driving Involved          0
Unsafe Speed Involved                0
Bicyclist Involved                   0
Rural Or Urban                       0
dtype: int64
In [74]:
crashdata2=crashdata2.dropna()
In [75]:
print(pd.isnull(crashdata2).sum())
Case Number                    0
County                         0
Crash Date                     0
Crash Day Of Week              0
Crash Month                    0
Crash Year                     0
Crash Hour                     0
Crash Type                     0
Intersection                   0
Latitude                       0
Longitude                      0
Pedestrians Killed             0
Environmental Condition        0
Severity                       0
Hazmat Involved                0
Light Condition                0
Surface Condition              0
Distracted Driving Involved    0
Unsafe Speed Involved          0
Bicyclist Involved             0
Rural Or Urban                 0
dtype: int64
In [76]:
crashdata2
Out[76]:
Case Number County Crash Date Crash Day Of Week Crash Month Crash Year Crash Hour Crash Type Intersection Latitude Longitude Pedestrians Killed Environmental Condition Severity Hazmat Involved Light Condition Surface Condition Distracted Driving Involved Unsafe Speed Involved Bicyclist Involved Rural Or Urban
0 B130-2019-04124A Essex 12/12/2019 Thursday December 2019 2.0 Fixed Object Not Within Intersection Boundaries 40.710574 -74.174822 0 Clear Suspected Minor Injury No Dark - Street Lights On (Spot) Dry Yes No No Urban
2 B060-2019-03838A Essex 12/11/2019 Wednesday December 2019 19.0 Same Direction - Side Swipe Not Within Intersection Boundaries 40.893689 -74.279018 0 Clear No Apparent Injury No Dark - Street Lights Not Present Dry No No No Urban
3 B130-2019-04122A Somerset 12/11/2019 Wednesday December 2019 18.0 Fixed Object Not Within Intersection Boundaries 40.498190 -74.588213 0 Clear No Apparent Injury No Dark - Street Lights On (Continuous) Dry Yes No No Urban
4 A040-2019-00866A Cumberland 12/11/2019 Wednesday December 2019 18.0 Fixed Object Not Within Intersection Boundaries 39.471608 -75.206745 0 Clear No Apparent Injury No Dark - Street Lights Not Present Dry No No No Urban
6 E040-2019-05221A Bergen 12/11/2019 Wednesday December 2019 17.0 Same Direction - Rear End Not Within Intersection Boundaries 41.020455 -74.068621 0 Clear Possible Injury No Dark - Street Lights Not Present Dry No No No Urban
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
3978586 2006-0001 Somerset 01/01/2006 Sunday January 2006 0.0 Fixed Object Not Within Intersection Boundaries 40.661751 -74.641944 0 Clear No Apparent Injury No Dark - Street Lights On (Continuous) Wet Yes No No Urban
3978587 A140200600013A Salem 01/01/2006 Sunday January 2006 0.0 Non-fixed Object Not Within Intersection Boundaries 39.615267 -75.327055 0 Rain No Apparent Injury No Dark - Street Lights Not Present Wet No No No Rural
3978588 C080-2006-00001A Burlington 01/01/2006 Sunday January 2006 0.0 Fixed Object Not Within Intersection Boundaries 39.885843 -74.703062 0 Overcast No Apparent Injury No Dark - Street Lights On (Continuous) Wet No Yes No Urban
3978589 B060-2006-00001A Passaic 01/01/2006 Sunday January 2006 0.0 Fixed Object Not Within Intersection Boundaries 40.901131 -74.140198 0 Snow No Apparent Injury No Dark - Street Lights On (Continuous) Wet Yes No No Urban
3978596 2006-0025 Monmouth 01/01/2006 Sunday January 2006 0.0 Struck Parked Vehicle Not Within Intersection Boundaries 40.238771 -74.027908 0 Clear No Apparent Injury No Dark - Street Lights Not Present Dry Yes No No Urban

2676066 rows × 21 columns

In [77]:
newData= crashdata2[crashdata2['Crash Year']>2014]
In [78]:
newData
newData.to_csv(r'C:\Users\deepd\Desktop\newData.csv', index=False)
In [79]:
newData
Out[79]:
Case Number County Crash Date Crash Day Of Week Crash Month Crash Year Crash Hour Crash Type Intersection Latitude Longitude Pedestrians Killed Environmental Condition Severity Hazmat Involved Light Condition Surface Condition Distracted Driving Involved Unsafe Speed Involved Bicyclist Involved Rural Or Urban
0 B130-2019-04124A Essex 12/12/2019 Thursday December 2019 2.0 Fixed Object Not Within Intersection Boundaries 40.710574 -74.174822 0 Clear Suspected Minor Injury No Dark - Street Lights On (Spot) Dry Yes No No Urban
2 B060-2019-03838A Essex 12/11/2019 Wednesday December 2019 19.0 Same Direction - Side Swipe Not Within Intersection Boundaries 40.893689 -74.279018 0 Clear No Apparent Injury No Dark - Street Lights Not Present Dry No No No Urban
3 B130-2019-04122A Somerset 12/11/2019 Wednesday December 2019 18.0 Fixed Object Not Within Intersection Boundaries 40.498190 -74.588213 0 Clear No Apparent Injury No Dark - Street Lights On (Continuous) Dry Yes No No Urban
4 A040-2019-00866A Cumberland 12/11/2019 Wednesday December 2019 18.0 Fixed Object Not Within Intersection Boundaries 39.471608 -75.206745 0 Clear No Apparent Injury No Dark - Street Lights Not Present Dry No No No Urban
6 E040-2019-05221A Bergen 12/11/2019 Wednesday December 2019 17.0 Same Direction - Rear End Not Within Intersection Boundaries 41.020455 -74.068621 0 Clear Possible Injury No Dark - Street Lights Not Present Dry No No No Urban
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1311922 P15000052 Essex 01/01/2015 Thursday January 2015 0.0 Fixed Object Not Within Intersection Boundaries 40.741430 -74.191868 0 Clear No Apparent Injury No Dark - Street Lights On (Continuous) Dry Yes Yes No Urban
1311924 15NT00003 Monmouth 01/01/2015 Thursday January 2015 0.0 Fixed Object Not Within Intersection Boundaries 40.227818 -74.049848 0 Clear No Apparent Injury No Dark - Street Lights Not Present Dry Yes No No Urban
1311925 000003-15 Hudson 01/01/2015 Thursday January 2015 0.0 Right Angle Within Intersection Boundaries 40.705579 -74.085315 0 Clear No Apparent Injury No Dark - Street Lights On (Continuous) Dry Yes No No Urban
1311926 E040-2015-00001A Union 01/01/2015 Thursday January 2015 0.0 Animal Not Within Intersection Boundaries 40.655093 -74.287141 0 Clear No Apparent Injury No Dark - Street Lights Not Present Dry No No No Urban
1311928 15-00001 Middlesex 01/01/2015 Thursday January 2015 0.0 Fixed Object Within Intersection Boundaries 40.437335 -74.391486 0 Clear No Apparent Injury No Dark - Street Lights On (Continuous) Dry Yes No No Urban

890199 rows × 21 columns

In [80]:
uniqueSeverity=pd.unique(newData["Severity"])
uniqueSeverity
Out[80]:
array(['Suspected Minor Injury', 'No Apparent Injury', 'Possible Injury',
       'Suspected Serious Injury', 'Fatal Injury'], dtype=object)
In [81]:
uniqueSeverity=pd.unique(newData["Crash Month"])
uniqueSeverity
Out[81]:
array(['December', 'November', 'October', 'September', 'August', 'July',
       'June', 'May', 'April', 'March', 'February', 'January'],
      dtype=object)
In [82]:
uniqueSeverity=pd.unique(newData["Crash Day Of Week"])
uniqueSeverity
Out[82]:
array(['Thursday', 'Wednesday', 'Tuesday', 'Monday', 'Sunday', 'Saturday',
       'Friday'], dtype=object)
In [83]:
uniqueSeverity=pd.unique(newData["Crash Date"])
uniqueSeverity
Out[83]:
array(['12/12/2019', '12/11/2019', '12/10/2019', ..., '01/03/2015',
       '01/02/2015', '01/01/2015'], dtype=object)
In [84]:
uniqueSeverity=pd.unique(newData["County"])
uniqueSeverity
Out[84]:
array(['Essex', 'Somerset', 'Cumberland', 'Bergen', 'Middlesex',
       'Atlantic', 'Union', 'Ocean', 'Sussex', 'Hudson', 'Morris',
       'Hunterdon', 'Passaic', 'Warren', 'Salem', 'Gloucester', 'Camden',
       'Monmouth', 'Mercer', 'Burlington', 'Cape May'], dtype=object)
In [85]:
uniqueSeverity=pd.unique(newData["Crash Year"])
uniqueSeverity
Out[85]:
array([2019, 2018, 2017, 2016, 2015], dtype=int64)
In [86]:
uniqueSeverity=pd.unique(newData["Crash Hour"])
uniqueSeverity
Out[86]:
array([ 2., 19., 18., 17., 16., 14., 10.,  9.,  8.,  7.,  6.,  5.,  4.,
       15., 21., 13., 12., 11., 23., 22.,  3., 20.,  0.,  1.])
In [87]:
uniqueSeverity=pd.unique(newData["Crash Type"])
uniqueSeverity
Out[87]:
array(['Fixed Object', 'Same Direction - Side Swipe',
       'Same Direction - Rear End', 'Right Angle', 'Backing', 'Animal',
       'Non-fixed Object', 'Other', 'Pedestrian',
       'Opposite Direction - Side Swipe', 'Overturned',
       'Left Turn / U Turn', 'Struck Parked Vehicle', 'Encroachment',
       'Opposite Direction - Head On/Angular', 'Pedalcyclist',
       'Railcar-Vehicle', 'Unknown'], dtype=object)
In [88]:
uniqueSeverity=pd.unique(newData["Intersection"])
uniqueSeverity
Out[88]:
array(['Not Within Intersection Boundaries',
       'Within Intersection Boundaries', 'At or near Railroad Crossing'],
      dtype=object)
In [89]:
uniqueSeverity=pd.unique(newData["Pedestrians Killed"])
uniqueSeverity
Out[89]:
array([0, 1, 2], dtype=int64)
In [90]:
uniqueSeverity=pd.unique(newData["Hazmat Involved"])
uniqueSeverity
Out[90]:
array(['No', 'Yes'], dtype=object)
In [91]:
uniqueSeverity=pd.unique(newData["Light Condition"])
uniqueSeverity
Out[91]:
array(['Dark - Street Lights On (Spot)',
       'Dark - Street Lights Not Present',
       'Dark - Street Lights On (Continuous)', 'Daylight',
       'Dark - Street Lights Off', 'Dusk', 'Dawn', 'Unknown'],
      dtype=object)
In [92]:
uniqueSeverity=pd.unique(newData["Surface Condition"])
uniqueSeverity
Out[92]:
array(['Dry', 'Wet', 'Snowy', 'Icy', 'Slush', 'Unknown', 'Other',
       'Water (Standing or Moving)', 'Sand', 'Oil/Fuel',
       'Mud, Dirt, Gravel', 'Oil', 'Sand / Mud / Dirt'], dtype=object)
In [93]:
uniqueSeverity=pd.unique(newData["Distracted Driving Involved"])
uniqueSeverity
Out[93]:
array(['Yes', 'No'], dtype=object)
In [94]:
uniqueSeverity=pd.unique(newData["Unsafe Speed Involved"])
uniqueSeverity
Out[94]:
array(['No', 'Yes'], dtype=object)
In [95]:
uniqueSeverity=pd.unique(newData["Bicyclist Involved"])
uniqueSeverity
Out[95]:
array(['No', 'Yes'], dtype=object)
In [96]:
uniqueSeverity=pd.unique(newData["Rural Or Urban"])
uniqueSeverity
Out[96]:
array(['Urban', 'Rural', 'Unknown'], dtype=object)
In [97]:
uniqueSeverity=pd.unique(newData["Environmental Condition"])
uniqueSeverity
Out[97]:
array(['Clear', 'Snow', 'Rain', 'Overcast', 'Sleet / Hail',
       'Freezing Rain', 'Fog / Smog / Smoke', 'Unknown',
       'Severe Crosswinds', 'Other', 'Blowing Sand / Dirt',
       'Blowing Snow', 'Sleet / Hail / Freezing Rain'], dtype=object)
In [98]:
i=newData[newData["Environmental Condition"]=='Unknown'].index
newData.drop(i, inplace=True)
i=newData[newData["Crash Type"]=='Unknown'].index
newData.drop(i, inplace=True)
i=newData[newData["Light Condition"]=='Unknown'].index
newData.drop(i, inplace=True)
i=newData[newData["Surface Condition"]=='Unknown'].index
newData.drop(i, inplace=True)
i=newData[newData["Rural Or Urban"]=='Unknown'].index
newData.drop(i, inplace=True)
i=newData[newData["Pedestrians Killed"]==1].index
newData.drop(i, inplace=True)
i=newData[newData["Pedestrians Killed"]==2].index
newData.drop(i, inplace=True)
In [99]:
uniqueSeverity=pd.unique(newData["Environmental Condition"])
uniqueSeverity
Out[99]:
array(['Clear', 'Snow', 'Rain', 'Overcast', 'Sleet / Hail',
       'Freezing Rain', 'Fog / Smog / Smoke', 'Severe Crosswinds',
       'Other', 'Blowing Sand / Dirt', 'Blowing Snow',
       'Sleet / Hail / Freezing Rain'], dtype=object)
In [100]:
uniqueSeverity=pd.unique(newData["Crash Type"])
uniqueSeverity
Out[100]:
array(['Fixed Object', 'Same Direction - Side Swipe',
       'Same Direction - Rear End', 'Right Angle', 'Backing', 'Animal',
       'Non-fixed Object', 'Other', 'Pedestrian',
       'Opposite Direction - Side Swipe', 'Overturned',
       'Left Turn / U Turn', 'Struck Parked Vehicle', 'Encroachment',
       'Opposite Direction - Head On/Angular', 'Pedalcyclist',
       'Railcar-Vehicle'], dtype=object)
In [101]:
uniqueSeverity=pd.unique(newData["Rural Or Urban"])
uniqueSeverity
Out[101]:
array(['Urban', 'Rural'], dtype=object)
In [102]:
uniqueSeverity=pd.unique(newData["Surface Condition"])
uniqueSeverity
Out[102]:
array(['Dry', 'Wet', 'Snowy', 'Icy', 'Slush', 'Other',
       'Water (Standing or Moving)', 'Sand', 'Oil/Fuel',
       'Mud, Dirt, Gravel', 'Oil', 'Sand / Mud / Dirt'], dtype=object)
In [103]:
uniqueSeverity=pd.unique(newData["Light Condition"])
uniqueSeverity
Out[103]:
array(['Dark - Street Lights On (Spot)',
       'Dark - Street Lights Not Present',
       'Dark - Street Lights On (Continuous)', 'Daylight',
       'Dark - Street Lights Off', 'Dusk', 'Dawn'], dtype=object)
In [104]:
uniqueSeverity=pd.unique(newData["Pedestrians Killed"])
uniqueSeverity
Out[104]:
array([0], dtype=int64)
In [105]:
newData
Out[105]:
Case Number County Crash Date Crash Day Of Week Crash Month Crash Year Crash Hour Crash Type Intersection Latitude Longitude Pedestrians Killed Environmental Condition Severity Hazmat Involved Light Condition Surface Condition Distracted Driving Involved Unsafe Speed Involved Bicyclist Involved Rural Or Urban
0 B130-2019-04124A Essex 12/12/2019 Thursday December 2019 2.0 Fixed Object Not Within Intersection Boundaries 40.710574 -74.174822 0 Clear Suspected Minor Injury No Dark - Street Lights On (Spot) Dry Yes No No Urban
2 B060-2019-03838A Essex 12/11/2019 Wednesday December 2019 19.0 Same Direction - Side Swipe Not Within Intersection Boundaries 40.893689 -74.279018 0 Clear No Apparent Injury No Dark - Street Lights Not Present Dry No No No Urban
3 B130-2019-04122A Somerset 12/11/2019 Wednesday December 2019 18.0 Fixed Object Not Within Intersection Boundaries 40.498190 -74.588213 0 Clear No Apparent Injury No Dark - Street Lights On (Continuous) Dry Yes No No Urban
4 A040-2019-00866A Cumberland 12/11/2019 Wednesday December 2019 18.0 Fixed Object Not Within Intersection Boundaries 39.471608 -75.206745 0 Clear No Apparent Injury No Dark - Street Lights Not Present Dry No No No Urban
6 E040-2019-05221A Bergen 12/11/2019 Wednesday December 2019 17.0 Same Direction - Rear End Not Within Intersection Boundaries 41.020455 -74.068621 0 Clear Possible Injury No Dark - Street Lights Not Present Dry No No No Urban
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1311922 P15000052 Essex 01/01/2015 Thursday January 2015 0.0 Fixed Object Not Within Intersection Boundaries 40.741430 -74.191868 0 Clear No Apparent Injury No Dark - Street Lights On (Continuous) Dry Yes Yes No Urban
1311924 15NT00003 Monmouth 01/01/2015 Thursday January 2015 0.0 Fixed Object Not Within Intersection Boundaries 40.227818 -74.049848 0 Clear No Apparent Injury No Dark - Street Lights Not Present Dry Yes No No Urban
1311925 000003-15 Hudson 01/01/2015 Thursday January 2015 0.0 Right Angle Within Intersection Boundaries 40.705579 -74.085315 0 Clear No Apparent Injury No Dark - Street Lights On (Continuous) Dry Yes No No Urban
1311926 E040-2015-00001A Union 01/01/2015 Thursday January 2015 0.0 Animal Not Within Intersection Boundaries 40.655093 -74.287141 0 Clear No Apparent Injury No Dark - Street Lights Not Present Dry No No No Urban
1311928 15-00001 Middlesex 01/01/2015 Thursday January 2015 0.0 Fixed Object Within Intersection Boundaries 40.437335 -74.391486 0 Clear No Apparent Injury No Dark - Street Lights On (Continuous) Dry Yes No No Urban

886015 rows × 21 columns

In [106]:
HazmatInvolved_mapping= {"Hazmat Involved":{"No":0, "Yes":1}}
newData.replace(HazmatInvolved_mapping, inplace=True)
In [107]:
LightCondition_mapping={'Light Condition': {"Dark - Street Lights On (Spot)": 1, "Dark - Street Lights Not Present": 1, "Dark - Street Lights On (Continuous)": 1, "Dark - Street Lights Off": 1, "Daylight": 2, "Dusk": 3, "Dawn": 4}}
newData.replace(LightCondition_mapping, inplace= True)
In [108]:
RuralOrUrban_mapping= {"Rural Or Urban":{"Rural":0, "Urban":1}}
newData.replace(RuralOrUrban_mapping, inplace=True)
In [109]:
BicyclistInvolved_mapping= {"Bicyclist Involved":{"No":0, "Yes":1}}
newData.replace(BicyclistInvolved_mapping, inplace=True)
In [110]:
UnsafeSpeedInvolved_mapping= {"Unsafe Speed Involved":{"No":0, "Yes":1}}
newData.replace(UnsafeSpeedInvolved_mapping, inplace=True)
In [111]:
DistractedDrivingInvolved_mapping= {"Distracted Driving Involved":{"No":0, "Yes":1}}
newData.replace(DistractedDrivingInvolved_mapping, inplace=True)
In [112]:
SurfaceCondition_mapping= {"Surface Condition":{"Dry":1, "Wet":2, "Snowy":3, "Icy":3, "Slush":3, "Water (Standing or Moving)":2, "Sand":4, "Oil/Fuel":5, "Oil":5, "Sand / Mud / Dirt":4, "Mud, Dirt, Gravel":4, "Other":6}}
newData.replace(SurfaceCondition_mapping, inplace=True)
In [113]:
Severity_mapping= {"Severity":{"Fatal Injury":1, "Suspected Serious Injury":1, "Possible Injury":0, "Suspected Minor Injury":0, "No Apparent Injury":0}}
newData.replace(Severity_mapping, inplace=True)
In [114]:
Intersection_mapping= {"Intersection":{"Not Within Intersection Boundaries":1, "Within Intersection Boundaries":2, "At or near Railroad Crossing":3}}
newData.replace(Intersection_mapping, inplace=True)
In [115]:
EnvironmentalCondition_mapping= {"Environmental Condition":{"Clear":1, "Snow":2, "Rain":3, "Overcast":4, "Sleet / Hail":3, "Freezing Rain":3, "Sleet / Hail / Freezing Rain":3, "Fog / Smog / Smoke":5, "Severe Crosswinds":6, "Blowing Sand / Dirt":6, "Blowing Snow":2, "Other":7}}
newData.replace(EnvironmentalCondition_mapping, inplace=True)
In [116]:
CrashType_mapping= {"Crash Type":{"Fixed Object":1, "Same Direction - Side Swipe":2, "Same Direction - Rear End":3, "Right Angle":4, "Backing":5, "Animal":6, "Non-fixed Object":7, "Pedestrian":8, "Opposite Direction - Side Swipe":9, "Overturned":10, "Blowing Snow":11, "Left Turn / U Turn":12, "Struck Parked Vehicle":13, "Encroachment":14, "Opposite Direction - Head On/Angular":15, "Pedalcyclist":16, "Railcar-Vehicle":17, "Other":18}}
newData.replace(CrashType_mapping, inplace=True)
In [132]:
County_mapping= {"County":{"Essex":1, "Somerset":2, "Cumberland":3, "Bergen":4, "Atlantic":5, "Union":6, "Ocean":7, "Sussex":8, "Hudson":9, "Morris":10, "Hunterdon":11, "Passaic":12, "Warren":13, "Salem":14, "Gloucester":15, "Camden":16, "Monmouth":17, "Mercer":18, "Burlington":19, "Cape May":20, "Middlesex":21}}
newData.replace(County_mapping, inplace=True)
C:\Users\deepd\Anaconda3\lib\site-packages\pandas\core\generic.py:6702: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  regex=regex,

CrashDayOfWeek_mapping= {"Crash Day Of Week":{"Sunday":1, "Monday":2, "Tuesday":3, "Wednesday":4, "Thursday":5, "Friday":6, "Saturday":7}}

newData.replace(CrashDayOfWeek_mapping, inplace=True)

In [134]:
CrashMonth_mapping= {"Crash Month":{"January":1, "February":2, "March":3, "April":4, "May":5, "June":6, "July":7, "August":8, "September":9, "October":10, "November":11, "December":12}}
newData.replace(CrashMonth_mapping, inplace=True)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-134-5a8ba67c178b> in <module>
      1 CrashMonth_mapping= {"Crash Month":{"January":1, "February":2, "March":3, "April":4, "May":5, "June":6, "July":7, "August":8, "September":9, "October":10, "November":11, "December":12}}
----> 2 newData.replace(CrashMonth_mapping, inplace=True)

~\Anaconda3\lib\site-packages\pandas\core\frame.py in replace(self, to_replace, value, inplace, limit, regex, method)
   4261             limit=limit,
   4262             regex=regex,
-> 4263             method=method,
   4264         )
   4265 

~\Anaconda3\lib\site-packages\pandas\core\generic.py in replace(self, to_replace, value, inplace, limit, regex, method)
   6680 
   6681             return self.replace(
-> 6682                 to_replace, value, inplace=inplace, limit=limit, regex=regex
   6683             )
   6684         else:

~\Anaconda3\lib\site-packages\pandas\core\frame.py in replace(self, to_replace, value, inplace, limit, regex, method)
   4261             limit=limit,
   4262             regex=regex,
-> 4263             method=method,
   4264         )
   4265 

~\Anaconda3\lib\site-packages\pandas\core\generic.py in replace(self, to_replace, value, inplace, limit, regex, method)
   6700                                 value=value[c],
   6701                                 inplace=False,
-> 6702                                 regex=regex,
   6703                             )
   6704                     return None if inplace else res

~\Anaconda3\lib\site-packages\pandas\core\series.py in replace(self, to_replace, value, inplace, limit, regex, method)
   4362             limit=limit,
   4363             regex=regex,
-> 4364             method=method,
   4365         )
   4366 

~\Anaconda3\lib\site-packages\pandas\core\generic.py in replace(self, to_replace, value, inplace, limit, regex, method)
   6734                         dest_list=value,
   6735                         inplace=inplace,
-> 6736                         regex=regex,
   6737                     )
   6738 

~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in replace_list(self, src_list, dest_list, inplace, regex)
    610             return _compare_or_regex_search(values, s, regex)
    611 
--> 612         masks = [comp(s, regex) for i, s in enumerate(src_list)]
    613 
    614         result_blocks = []

~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in <listcomp>(.0)
    610             return _compare_or_regex_search(values, s, regex)
    611 
--> 612         masks = [comp(s, regex) for i, s in enumerate(src_list)]
    613 
    614         result_blocks = []

~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in comp(s, regex)
    608                     maybe_convert_objects(values), s.asm8, regex
    609                 )
--> 610             return _compare_or_regex_search(values, s, regex)
    611 
    612         masks = [comp(s, regex) for i, s in enumerate(src_list)]

~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in _compare_or_regex_search(a, b, regex)
   1965         raise TypeError(
   1966             "Cannot compare types {a!r} and {b!r}".format(
-> 1967                 a=type_names[0], b=type_names[1]
   1968             )
   1969         )

TypeError: Cannot compare types 'ndarray(dtype=int64)' and 'str'
In [ ]:
 
In [135]:
newData
Out[135]:
Case Number County Crash Date Crash Day Of Week Crash Month Crash Year Crash Hour Crash Type Intersection Latitude Longitude Pedestrians Killed Environmental Condition Severity Hazmat Involved Light Condition Surface Condition Distracted Driving Involved Unsafe Speed Involved Bicyclist Involved Rural Or Urban
0 B130-2019-04124A 1 12/12/2019 5 12 2019 2.0 1 1 40.710574 -74.174822 0 1 0 0 1 1 1 0 0 1
2 B060-2019-03838A 1 12/11/2019 4 12 2019 19.0 2 1 40.893689 -74.279018 0 1 0 0 1 1 0 0 0 1
3 B130-2019-04122A 2 12/11/2019 4 12 2019 18.0 1 1 40.498190 -74.588213 0 1 0 0 1 1 1 0 0 1
4 A040-2019-00866A 3 12/11/2019 4 12 2019 18.0 1 1 39.471608 -75.206745 0 1 0 0 1 1 0 0 0 1
6 E040-2019-05221A 4 12/11/2019 4 12 2019 17.0 3 1 41.020455 -74.068621 0 1 0 0 1 1 0 0 0 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1311922 P15000052 1 01/01/2015 5 1 2015 0.0 1 1 40.741430 -74.191868 0 1 0 0 1 1 1 1 0 1
1311924 15NT00003 17 01/01/2015 5 1 2015 0.0 1 1 40.227818 -74.049848 0 1 0 0 1 1 1 0 0 1
1311925 000003-15 9 01/01/2015 5 1 2015 0.0 4 2 40.705579 -74.085315 0 1 0 0 1 1 1 0 0 1
1311926 E040-2015-00001A 6 01/01/2015 5 1 2015 0.0 6 1 40.655093 -74.287141 0 1 0 0 1 1 0 0 0 1
1311928 15-00001 21 01/01/2015 5 1 2015 0.0 1 2 40.437335 -74.391486 0 1 0 0 1 1 1 0 0 1

886015 rows × 21 columns

In [149]:
newData
newData.to_csv(r'C:\Users\deepd\Desktop\newData.csv', index=False)
In [136]:
uniqueSeverity=pd.unique(newData["Light Condition"])
uniqueSeverity
Out[136]:
array([1, 2, 3, 4], dtype=int64)
In [137]:
uniqueSeverity=pd.unique(newData["County"])
uniqueSeverity
Out[137]:
array([ 1,  2,  3,  4, 21,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20], dtype=int64)
In [138]:
newData2 = newData[["County", "Crash Day Of Week", "Crash Month", "Crash Type", "Intersection", "Environmental Condition", "Severity", "Hazmat Involved", "Light Condition", "Surface Condition", "Distracted Driving Involved", "Unsafe Speed Involved", "Bicyclist Involved", "Rural Or Urban"]]
In [139]:
print(newData2.info)
<bound method DataFrame.info of          County  Crash Day Of Week  Crash Month  Crash Type  Intersection  \
0             1                  5           12           1             1   
2             1                  4           12           2             1   
3             2                  4           12           1             1   
4             3                  4           12           1             1   
6             4                  4           12           3             1   
...         ...                ...          ...         ...           ...   
1311922       1                  5            1           1             1   
1311924      17                  5            1           1             1   
1311925       9                  5            1           4             2   
1311926       6                  5            1           6             1   
1311928      21                  5            1           1             2   

         Environmental Condition  Severity  Hazmat Involved  Light Condition  \
0                              1         0                0                1   
2                              1         0                0                1   
3                              1         0                0                1   
4                              1         0                0                1   
6                              1         0                0                1   
...                          ...       ...              ...              ...   
1311922                        1         0                0                1   
1311924                        1         0                0                1   
1311925                        1         0                0                1   
1311926                        1         0                0                1   
1311928                        1         0                0                1   

         Surface Condition  Distracted Driving Involved  \
0                        1                            1   
2                        1                            0   
3                        1                            1   
4                        1                            0   
6                        1                            0   
...                    ...                          ...   
1311922                  1                            1   
1311924                  1                            1   
1311925                  1                            1   
1311926                  1                            0   
1311928                  1                            1   

         Unsafe Speed Involved  Bicyclist Involved  Rural Or Urban  
0                            0                   0               1  
2                            0                   0               1  
3                            0                   0               1  
4                            0                   0               1  
6                            0                   0               1  
...                        ...                 ...             ...  
1311922                      1                   0               1  
1311924                      0                   0               1  
1311925                      0                   0               1  
1311926                      0                   0               1  
1311928                      0                   0               1  

[886015 rows x 14 columns]>
In [140]:
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.metrics import accuracy_score
import graphviz
In [141]:
predictors = newData2.drop(['Severity'], axis=1)
target = newData2["Severity"]
x_train, x_val, y_train, y_val = train_test_split(predictors, target, test_size = 0.20, random_state = 2)
In [143]:
decisiontree = DecisionTreeClassifier()
decisiontree.fit(x_train, y_train)
y_pred = decisiontree.predict(x_val)
acc_decisiontree = round(accuracy_score(y_pred, y_val) * 100, 2)
print(acc_decisiontree)
98.93
In [144]:
gaussian = GaussianNB()
gaussian.fit(x_train, y_train)
y_pred = gaussian.predict(x_val)
acc_gaussian = round(accuracy_score(y_pred, y_val) * 100, 2)
print(acc_gaussian)
97.75
In [145]:
log = LogisticRegression()
log = log.fit(x_train, y_train)
y_pred = log.predict(x_val)
acc_log = round(accuracy_score(y_pred, y_val) * 100, 2)
print(acc_log)
C:\Users\deepd\Anaconda3\lib\site-packages\sklearn\linear_model\logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
99.32
In [146]:
rf = RandomForestClassifier()
rf = rf.fit(x_train, y_train)
y_pred = rf.predict(x_val)
acc_rf = round(accuracy_score(y_pred, y_val) * 100, 2)
print(acc_rf)
C:\Users\deepd\Anaconda3\lib\site-packages\sklearn\ensemble\forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.
  "10 in version 0.20 to 100 in 0.22.", FutureWarning)
99.2
In [147]:
kn = KNeighborsClassifier()
kn = log.fit(x_train, y_train)
y_pred = kn.predict(x_val)
acc_kn = round(accuracy_score(y_pred, y_val) * 100, 2)
print(acc_kn)
C:\Users\deepd\Anaconda3\lib\site-packages\sklearn\linear_model\logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
99.32

dasdfsdaf

In [148]:
sns.heatmap(newData2.corr())
Out[148]:
<matplotlib.axes._subplots.AxesSubplot at 0x22eeb579488>
In [63]:
import os
import tarfile
from six.moves import urllib
In [19]:
type(newData.Latitude.tolist())
type(newData.Longitude.tolist())
Out[19]:
list
In [26]:
# from bokeh.io import output_file, output_notebook, show
# from bokeh.models import (
#   GMapPlot, GMapOptions, ColumnDataSource, Circle, LogColorMapper, BasicTicker, ColorBar,
#     Range1d, PanTool, WheelZoomTool, BoxSelectTool
# )
# from bokeh.models.mappers import ColorMapper, LinearColorMapper
# from bokeh.palettes import Viridis5


# map_options = GMapOptions(lat=40.710574, lng=-74.174822, map_type="roadmap", zoom=6)

# plot = GMapPlot(
#     x_range=Range1d(), y_range=Range1d(), map_options=map_options
# )
# plot.title.text = "NJ Crash PLOT!"

# plot.api_key = "AIzaSyB4Q3TJlLD4pPi0j4YMqf9cAeh2Ndm2FY4"

# source = ColumnDataSource(
#     data=dict(
#         lat=newData.Latitude.tolist(),
#         lon=newData.Longitude.tolist()        
#     )
# )

# #color_mapper = CategoricalColorMapper(factors=['hi', 'lo'], palette=[RdBu3[2], RdBu3[0]])
# #color_mapper = LogColorMapper(palette="Viridis5", low=min_median_house_value, high=max_median_house_value)
# color_mapper = LinearColorMapper(palette=Viridis5)

# circle = Circle(x="lon", y="lat", fill_color={'transform': color_mapper}, fill_alpha=0.5, line_color=None)
# plot.add_glyph(source, circle)

# #color_bar = ColorBar(color_mapper=color_mapper, ticker=BasicTicker(),
#                      #label_standoff=12, border_line_color=None, location=(0,0))
# #plot.add_layout(color_bar, 'right')

# plot.add_tools(PanTool(), WheelZoomTool(), BoxSelectTool())
# #output_file("gmap_plot.html")
# output_notebook()

# show(plot)
Loading BokehJS ...
In [29]:
# import matplotlib.pyplot as plt
# newData.plot(kind="scatter", x="Longitude", y="Latitude", alpha=0.25)
# plt.show()
In [ ]:
import matplotlib.image as mpimg
california_img=mpimg.imread('california.png')
ax = housing.plot(kind="scatter", x="longitude", y="latitude", figsize=(10,7),
                       s=housing['population']/100, label="Branch Customers",
                       c="total_bedrooms", cmap=plt.get_cmap("jet"),
                       colorbar=False, alpha=0.4,
                      )
plt.imshow(california_img, extent=[-124.55, -113.80, 32.45, 42.05], alpha=0.5)
plt.ylabel("", fontsize=14)
plt.xlabel("", fontsize=14)
plt.tick_params(colors='w')

prices = housing["median_house_value"]
cbar = plt.colorbar()
cbar.set_cmap("jet")
cbar.solids.set_edgecolor("face")
cbar.solids.set_cmap("jet")
cbar.set_label('Churn Probability', fontsize=16, alpha=1, 
               rotation=270, labelpad=20)

plt.legend(fontsize=16)
plt.show()